# ANES_coding_forImputedDatasets.R

# Part of the replication archive for 
#
#   Bullock, John G. 2020. "Education and Attitudes toward Redistribution in
#   the United States." British Journal of Political Science 50.


# This file is for use after datasets have been created by 
# ANES_create_imputed.R.  

library(Bullock, lib.loc = c(.libPaths(), 'packageLibrary'))   # for rescale(), split_fac(), sumNA()
library(car)       # for Recode()
library(haven)     # for read_dta()
library(stringr)   # for str_pad()
library(tools)     # for toTitleCase()

source('CSL_coding.R')          
source('ANES_stateYoungAugmentation.R')
source('functions/mergeStateControlVars.R')
source('functions/NES_StateRecode.R')

ANES <- readRDS('data/ANES_withMergedCSLs.RDS')  # to merge in some extra variables
ANESGuarantee <- new.env()
ANESHealth    <- new.env()
load('data/ANESImputedDatasetGuarantee.RData', envir = ANESGuarantee)                     
load('data/ANESImputedDatasetHealth.RData',    envir = ANESHealth)



#######################################################
# CODE REQUIRED VARIABLES
#######################################################

# VARIABLES NEEDED FOR DATA MERGING, IV CODING, AND CONTROL 
for (depvar in c('guarantee', 'health')) {
  if (depvar == 'guarantee') {
    imputations <- ANESGuarantee$guarantee.out$imputations
  } else if (depvar == 'health') {
    imputations <- ANESHealth$health.out$imputations
  }

  for (imp in 1:length(imputations)) {
    imputedDataset                 <- imputations[[imp]]
    imputedDataset$ID.unique       <- imputedDataset$respondentID
    imputedDataset$yearInt.fac     <- ordered(Recode(imputedDataset$yearInt, '1994 = 1996') )
    imputedDataset$age.fac         <- Recode(ordered(imputedDataset$age), '17:18 = 18')
    imputedDataset$yearYoung.fac   <- cut(
      imputedDataset$yearYoung, 
      breaks         = c(1800, seq(1890, 2010, by = 10)), 
      right          = FALSE,
      labels         = c('1800-89', '1890-99', '1900-09', '1910-19', '1920-29', '1930-39', '1940-49', '1950-59', '1960-69', '1970-79', '1980-89', '1990-99', '2000-2008'),
      ordered_result = TRUE)
    imputedDataset$yearIntNorm     <- imputedDataset$yearInt   - 1991  # GSS median is 1991
    imputedDataset$yearYoungNorm   <- imputedDataset$yearYoung - 1962  # GSS median is 1962
    imputedDataset$YOB             <- imputedDataset$yearInt - imputedDataset$age
    imputedDataset$YOB.1900plus    <- imputedDataset$YOB >= 1900
    imputedDataset$YOB.limit       <- imputedDataset$YOB>=1900 & imputedDataset$YOB<=1980
    imputedDataset$educ            <- pmin(imputedDataset$educ, 13) 
    imputedDataset$HSgrad          <- ordered(imputedDataset$educ5Level) >= 'HSdiploma'  
    imputedDataset$educ5Level      <- ordered(imputedDataset$educ5Level)
    imputedDataset$college         <- imputedDataset$educ5Level >= 'BA or higher'
    imputedDataset$collegeAttended <- imputedDataset$educ5Level >= 'some college'
    imputedDataset$black           <- imputedDataset$race=='black'
    imputedDataset$white           <- imputedDataset$race=='white'   
    #imputedDataset$religion        <- car::Recode(imputedDataset$religion, '0=NA; 1="Protestant"; 2="Catholic"; 3="Jewish"; 4="other"', as.factor=TRUE)
    #imputedDataset$religion        <- relevel(imputedDataset$religion, 'Protestant')  # make "Protestant" the reference category
    
    # Merge data on state and region
    imputedDataset <- merge(
      imputedDataset, 
      ANES[, c('ID.unique', 'yearInt', 'VCF0132', 'VCF0133', 'VCF0142', 'VCF0901B', 'VCF0112')], 
      by    = c('ID.unique', 'yearInt'), 
      all.x = TRUE, 
      all.y = FALSE, 
      sort  = FALSE)
  
    # SEGREGATION-RELATED MEASURES
    imputedDataset$blackPostBrown <- 
      imputedDataset$race       == 'black' & 
      imputedDataset$yearYoung  >= 1958    & 
      imputedDataset$stateYoung %IN% c(
        'AL', 'AR', 'DE', 'DC', 'FL', 'GA', 'KY', 'LA', 'MD', 'MS', 'NC', 'OK', 
        'SC', 'TN', 'TX', 'VA', 'WV')
    imputedDataset$MSDuringRepeal <- 
      imputedDataset$race       ==   'white'   &
      imputedDataset$yearYoung  %IN% 1957:1982 & 
      imputedDataset$stateYoung ==   'MS' 
    imputedDataset$SCDuringRepeal <- 
      imputedDataset$race       ==  'white'   &
      imputedDataset$yearYoung %IN% 1956:1971 & 
      imputedDataset$stateYoung ==  'SC'
    imputedDataset$duringRepeal   <- 
      (imputedDataset$MSDuringRepeal | imputedDataset$SCDuringRepeal) & 
      imputedDataset$race == 'white' 

    
    
    ##########################################################################
    # MERGE STATE-LEVEL CHARACTERISTICS INTO ANES CUMULATIVE FILE 
    ##########################################################################
    imputedDataset <- bind_cols(
      imputedDataset, 
      mergeStateControlVars(
        imputedDataset$stateYoung, 
        imputedDataset$yearYoung))
  

    
    ##############################################################
    # MERGE CSL INSTRUMENTS INTO ANES CUMULATIVE FILE 
    ##############################################################
    imputedDataset <- left_join(
      x  = imputedDataset, 
      y  = CSLdata[, qw("state year CA")],
      by = c("stateYoung" = "state", "yearYoung" = "year"))
    imputedDataset$CA.fac <- cut(imputedDataset$CA, c(-100, 7, 10, 100))
      


    ##########################################################################
    # OVERWRITE THE ORIGINAL IMPUTED DATASET
    ##########################################################################
    imputations[[imp]] <- imputedDataset  
  }


  #############################################################
  # SAVE CSL-MERGED FILES FOR IV ANALYSIS
  #############################################################
  saveRDS(
    object = imputations, 
    file   = paste0('data/ANESImputedDataset', toTitleCase(depvar), '_coded.RDS'))
}

